Chapter 7 Community composition

7.1 Metagenomics

load("resources/metagenomics/data_filtered.Rdata")

7.1.1 Taxonomy overview

Number of Archaea phyla

genome_metadata %>% 
  filter(domain == "Archaea")%>%
  dplyr::select(phylum) %>%
  unique() %>%
  pull() %>%
  length()%>% 
  cat()
0

Number of Bacteria phyla

genome_metadata %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(phylum) %>%
  unique() %>%
  pull() %>%
  length()%>% 
  cat()
15

7.1.1.1 Phylum level

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(Species, labels=c("Eb" = "Eptesicus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,Species, Region) %>%
  summarise(relabun=sum(count))
phylum_summary %>%
    group_by(phylum) %>%
    summarise(Total_mean=mean(relabun*100, na.rm=T),
              Total_sd=sd(relabun*100, na.rm=T),
              Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
              Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
              Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
              Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
              Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
              Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
    mutate(Total_meta=str_c(round(Total_mean,3),"±",round(Total_sd,3)),
           Eptesicus_meta=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
           Hypsugo_meta=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
           Pipistrellus_meta=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>% 
  arrange(-Eb_mean)%>% 
    dplyr::select(phylum,Total_meta,Eptesicus_meta,Hypsugo_meta,Pipistrellus_meta)
# A tibble: 15 × 5
   phylum           Total_meta    Eptesicus_meta Hypsugo_meta  Pipistrellus_meta
   <chr>            <chr>         <chr>          <chr>         <chr>            
 1 Pseudomonadota   68.255±37.904 63.683±35.64   89.049±29.599 52.374±38.209    
 2 Bacillota        16.181±29.125 15.147±28.225  5.375±19.493  25.984±33.849    
 3 Desulfobacterota 3.981±10.582  7.227±13.106   0±0           5.944±13.024     
 4 Bacteroidota     6.774±17.384  5.695±9.903    5.263±22.942  8.569±14.844     
 5 Bacillota_A      1.545±4.266   2.215±4.627    0±0           2.576±5.538      
 6 Fusobacteriota   0.694±1.785   1.818±3.061    0±0           0.781±1.589      
 7 Campylobacterota 1.288±6.836   1.731±2.428    0±0           2.198±10.309     
 8 Elusimicrobiota  0.141±0.755   0.72±1.643     0±0           0±0              
 9 Synergistota     0.405±1.282   0.562±1.116    0±0           0.684±1.771      
10 Planctomycetota  0.086±0.454   0.44±0.985     0±0           0±0              
11 Deferribacterota 0.08±0.4      0.407±0.861    0±0           0±0              
12 Bacillota_C      0.136±0.36    0.354±0.531    0±0           0.154±0.385      
13 Actinomycetota   0.075±0.534   0±0            0.201±0.876   0±0              
14 Cyanobacteriota  0.318±1.537   0±0            0±0           0.737±2.302      
15 Spirochaetota    0.041±0.296   0±0            0.111±0.486   0±0              

7.1.1.2 Family level

Percentange of families in each group

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family, Species) %>%
  summarise(relabun=sum(count))
family_summary %>%
    group_by(family) %>%
    summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
              Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
              Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
              Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
              Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
              Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
    mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
           Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
           Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
  arrange(-Eb_mean, -Ha_mean) %>%
  dplyr::select(family,Eptesicus,Hypsugo,Pipistrellus)
# A tibble: 55 × 4
   family              Eptesicus    Hypsugo       Pipistrellus 
   <chr>               <chr>        <chr>         <chr>        
 1 Diplorickettsiaceae 33.448±40.33 24.089±41.769 10.097±24.365
 2 Enterobacteriaceae  17.987±30.59 10.107±20.975 18.469±28.031
 3 Mycoplasmataceae    8.829±18.638 4.422±19.277  0±0          
 4 Vibrionaceae        7.534±23.723 5.863±11.437  5.576±18.175 
 5 Desulfovibrionaceae 6.023±11.116 0±0           4.561±9.963  
 6 Enterococcaceae     4.692±10.65  0.602±2.624   3.243±11.687 
 7 Dysgonomonadaceae   2.721±6.637  0±0           3.84±7.171   
 8 Aeromonadaceae      1.626±4.965  5.844±13.778  0±0          
 9 Leptotrichiaceae    1.611±3.041  0±0           0.733±1.597  
10 Helicobacteraceae   1.544±2.452  0±0           2.198±10.309 
# ℹ 45 more rows
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per environment
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~Species)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

7.1.1.3 Genus level

Percetange of genera in each group

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,genus, Species) %>%
  summarise(relabun=sum(count)) 

genus_summary %>%
  group_by(genus) %>%
  summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
            Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
            Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
            Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
            Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
            Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
  mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
         Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
         Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>% 
  arrange(-Eb_mean, -Ha_mean) %>%
  dplyr::select(genus,Eptesicus,Hypsugo,Pipistrellus)
# A tibble: 72 × 4
   genus                Eptesicus    Hypsugo       Pipistrellus 
   <chr>                <chr>        <chr>         <chr>        
 1 Aquirickettsiella    33.448±40.33 24.089±41.769 10.097±24.365
 2 Spiroplasma          8.747±18.476 0±0           0±0          
 3 Vibrio               7.534±23.723 5.863±11.437  5.576±18.175 
 4 Jejubacter           4.268±9      0±0           0±0          
 5 Escherichia          3.416±10.802 0±0           0.317±1.488  
 6 Pseudocitrobacter    3.293±9.507  0±0           0.006±0.028  
 7 Frigididesulfovibrio 2.803±4.353  0±0           1.129±2.876  
 8 Enterococcus         2.769±5.838  0.602±2.624   3.243±11.687 
 9 Dysgonomonas         2.721±6.637  0±0           3.84±7.171   
10 Serratia             2.389±7.462  6.416±19.813  2.331±10.5   
# ℹ 62 more rows
genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary %>%
  mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
  scale_color_manual(values=phylum_colors) +
  geom_jitter(alpha=0.5) + 
  facet_grid(.~Species)+
  theme_minimal() + 
  theme(axis.text.y = element_text(size=6))+
  labs(y="Genera", x="Relative abundance", color="Phylum")

Number of mags and distinct taxonomy

bats=c("Eb", "Pk", "Ha")

total_mags <- data.frame(
  Bat = character(),
  MAGs = numeric(), 
  Phylum = numeric(),
  Family = numeric(),
  Genus = numeric()
)

preabs_table <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>% 
  left_join(genome_metadata, by=join_by("genome"=="genome"))

phylum <- preabs_table %>% 
  distinct(phylum)

family <- preabs_table %>% 
  distinct(phylum, class, order, family)

genus <- preabs_table %>% 
  distinct(phylum, class, order, family, genus)

 total_mags <- rbind(
    total_mags,
    data.frame(
      Bat = "Total",
      MAGs = nrow(preabs_table),
      Phylum = nrow(phylum),
      Family = nrow(family),
      Genus = nrow(genus)
      )
  )

for (bat in bats) { 
  number <- preabs_table %>% 
    select({{bat}}) %>% 
    filter(.>=1)
  
  phylum <- preabs_table %>% 
     select({{bat}}, phylum) %>%
     filter(!!sym(bat)>=1) %>% 
     distinct(phylum)
  
  family <- preabs_table %>% 
     select({{bat}}, phylum, class, order, family) %>%
     filter(!!sym(bat)>=1) %>% 
     distinct(phylum, class, order, family)
  
  genus <- preabs_table %>% 
     select({{bat}}, phylum, class, order, family, genus) %>%
     filter(!!sym(bat)>=1) %>% 
     distinct(phylum, class, order, family, genus)
   
  total_mags <- rbind(
    total_mags,
    data.frame(
      Bat = bat,
      MAGs = nrow(number),
      Phylum = nrow(phylum),
      Family = nrow(family),
      Genus = nrow(genus)
      )
  )
}
bats=c("Eb", "Pk", "Ha")

no_annotation <- data.frame(
  Bat = character(),
  No_genus = numeric(), 
  No_species = numeric()
)

preabs_table <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>% 
  left_join(genome_metadata, by=join_by("genome"=="genome"))

genus <- preabs_table %>%
  filter(genus == "")

species <- preabs_table %>%
  filter(species == "")

 no_annotation <- rbind(
    no_annotation,
    data.frame(
      Bat = "Total",
      No_genus = nrow(genus),
      No_species = nrow(species)
      )
  )

for (bat in bats) { 
  number <- preabs_table %>% 
    select({{bat}}) %>% 
    filter(.>=1)
  
  genus <- preabs_table %>% 
     select({{bat}}, phylum, class, order, family, genus) %>%
     filter(!!sym(bat)>=1) %>%
    filter(genus == "")
  
  species <- preabs_table %>% 
     filter(!!sym(bat)>=1) %>%
    filter(species == "")
   
  no_annotation <- rbind(
    no_annotation,
    data.frame(
      Bat = bat,
      No_genus = nrow(genus),
      No_species = nrow(species)
      )
  )
}

Total percentage of MAGs without genus-level annotation

nongenera <- genome_metadata %>%
  filter(genus == "") %>%
  summarize(Mag_nogenera = n()) %>% 
  pull()
nmags <- total_mags %>% 
  filter(Bat=="Total") %>% 
  select(MAGs) %>% 
  pull()
perct <- nongenera*100/nmags
cat(perct)
20.74074

Percentage of MAGs without genus-level annotation by phylum

total_mag_phylum <- genome_metadata %>%
  group_by(phylum) %>%
  summarize(Total_MAGs = n())
genome_metadata %>%
  filter(genus == "") %>%
  group_by(phylum) %>%
  summarize(MAGs_nogenus = n()) %>% 
  left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>% 
  mutate(Percentage_nogenus=100*MAGs_nogenus/Total_MAGs) %>% 
  tt()
phylum MAGs_nogenus Total_MAGs Percentage_nogenus
Bacillota 2 14 14.285714
Bacillota_A 8 19 42.105263
Bacillota_C 1 1 100.000000
Bacteroidota 1 19 5.263158
Campylobacterota 1 3 33.333333
Deferribacterota 2 2 100.000000
Pseudomonadota 12 51 23.529412
Synergistota 1 1 100.000000

Number of bacterial species

genome_metadata %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(species) %>%
  unique() %>%
  pull() %>%
  length() %>% 
  cat()
37

Total percentage of MAGs without species-level annotation

nonspecies <- genome_metadata %>%
  filter(species == "") %>%
  summarize(Mag_nospecies = n()) %>% 
  pull()
perct <- nonspecies*100/nmags
cat(perct)
72.59259

MAGs without species-level annotation

total_mag_phylum <- genome_metadata %>%
  group_by(phylum) %>%
  summarize(MAGs_total = n())
genome_metadata %>%
  filter(species == "") %>%
  group_by(phylum) %>%
  summarize(MAGs_nospecies = n()) %>% 
  left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
  mutate(species_annotated=MAGs_total-MAGs_nospecies) %>% 
  mutate(Percentage_nospecies=100*MAGs_nospecies/MAGs_total) %>% 
  mutate(Percentage_species=100-100*MAGs_nospecies/MAGs_total)%>% 
  tt()
phylum MAGs_nospecies MAGs_total species_annotated Percentage_nospecies Percentage_species
Actinomycetota 1 1 0 100.00000 0.000000
Bacillota 10 14 4 71.42857 28.571429
Bacillota_A 18 19 1 94.73684 5.263158
Bacillota_C 1 1 0 100.00000 0.000000
Bacteroidota 13 19 6 68.42105 31.578947
Campylobacterota 3 3 0 100.00000 0.000000
Cyanobacteriota 2 2 0 100.00000 0.000000
Deferribacterota 2 2 0 100.00000 0.000000
Desulfobacterota 14 14 0 100.00000 0.000000
Elusimicrobiota 4 4 0 100.00000 0.000000
Fusobacteriota 1 2 1 50.00000 50.000000
Planctomycetota 1 1 0 100.00000 0.000000
Pseudomonadota 27 51 24 52.94118 47.058824
Synergistota 1 1 0 100.00000 0.000000
bats=c("Eb", "Pk", "Ha")

# Initialize an empty results data frame
single_sp <- data.frame(
  Bat = character(),
  Single_species = numeric()
)

table_upset_analysis <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>% 
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample")

unique_all <- table_upset_analysis %>% 
      filter(rowSums(across(Eb:Pk)) == 1)

    single_sp <- rbind(
    single_sp,
    data.frame(
      Bat = "Total",  # Label for the total value
      Single_species = nrow(unique_all) # Aggregate sum of column sums
    )
  )
  
  for (bat in bats) {  
    unique <- table_upset_analysis %>%
      filter(rowSums(across(Eb:Pk)) == 1) %>% 
      select(all_of(bat)) %>% 
      filter(.>0) %>% 
      nrow()
    # Add results to the results data frame
    single_sp <- rbind(
      single_sp,
      data.frame(
        Bat = bat,
        Single_species = unique # Aggregate sum of column sums
      )
    )
  }
single_ind <- data.frame(
  Bat = character(),
  Single_individual = numeric()
)

freq_table <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("asv")

 singleton_filt <- freq_table %>%
  rowwise() %>%
  mutate(row_sum = sum(c_across(-asv))) %>% # Calculate row sum for specific columns
  filter(row_sum == 1) %>% 
  column_to_rownames(var = "asv")  %>% 
  filter(row_sum==1)

     single_ind <- rbind(
      single_ind,
      data.frame(
        Bat = "Total",
        Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
      )
    )
     
for (bat in bats) {
  singleton_filt <- freq_table %>%
    rowwise() %>%
    mutate(row_sum = sum(c_across(-asv))) %>% 
    filter(row_sum == 1) %>% 
    column_to_rownames(var = "asv")  %>% 
    select(bat) %>% 
    filter(.==1)

      single_ind <- rbind(
      single_ind,
      data.frame(
        Bat = bat,
        Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
      )
    )
  }

7.2 Summary table

summary_table <- total_mags %>% 
  left_join(., no_annotation, by="Bat") %>% 
  left_join(., single_ind, by="Bat") %>% 
  left_join(., single_sp, by="Bat")
summary_table
    Bat MAGs Phylum Family Genus No_genus No_species Single_individual Single_species
1 Total  135     15     58    91       28         98                34             82
2    Eb   92     12     43    63       20         73                14             49
3    Pk   69     10     38    54       12         44                11             19
4    Ha   30      5     17    24        5         13                 9             14
summary_table %>% 
  select(-Phylum,-Family, -Genus) %>% 
  rowwise() %>% 
  mutate(No_genus_perc=No_genus*100/MAGs)%>% 
  mutate(No_species_perc=No_species*100/MAGs) %>% 
  mutate(Single_individual_perc=Single_individual*100/MAGs)%>% 
  mutate(Single_species_perc=Single_species*100/MAGs) %>% 
  mutate(Single_individual_per_Single_species=Single_individual*100/Single_species) %>% 
  select(1,6:11)
# A tibble: 4 × 7
# Rowwise: 
  Bat   Single_species No_genus_perc No_species_perc Single_individual_perc Single_species_perc Single_individual_per_Single_species
  <chr>          <int>         <dbl>           <dbl>                  <dbl>               <dbl>                                <dbl>
1 Total             82          20.7            72.6                   25.2                60.7                                 41.5
2 Eb                49          21.7            79.3                   15.2                53.3                                 28.6
3 Pk                19          17.4            63.8                   15.9                27.5                                 57.9
4 Ha                14          16.7            43.3                   30                  46.7                                 64.3

7.3 Amplicon

7.3.1 Taxonomy overview

load("resources/amplicon/data_nocopyfilt.Rdata")
genome_metadata <- genome_metadata %>%
  filter(!is.na(phylum))

genome_counts_filt <- genome_counts_filt %>% 
  filter(genome %in% genome_metadata$genome)%>%
  mutate_at(vars(-genome),~./sum(.))%>% 
  column_to_rownames(., "genome") %>% 
  .[,colSums(.)>0.00005] %>%
  filter(rowSums(across(everything())) != 0) %>% 
  rownames_to_column(., "genome")

genome_metadata <- genome_metadata %>% 
  filter(genome %in% genome_counts_filt$genome)

7.3.1.1 Phylum level

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(Species, labels=c("Eb" = "Eptesicus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,Species) %>%
  summarise(relabun=sum(count))
phylum_summary %>%
    group_by(phylum) %>%
    summarise(Total_mean=mean(relabun*100, na.rm=T),
              Total_sd=sd(relabun*100, na.rm=T),
              Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
              Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
              Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
              Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
              Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
              Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
    mutate(Total=str_c(round(Total_mean,3),"±",round(Total_sd,3)),
           Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
           Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
           Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>% 
  arrange(-Eb_mean)%>% 
    dplyr::select(phylum,Total,Eptesicus,Hypsugo,Pipistrellus)
# A tibble: 30 × 5
   phylum               Total         Eptesicus     Hypsugo       Pipistrellus 
   <chr>                <chr>         <chr>         <chr>         <chr>        
 1 Pseudomonadota       58.806±25.926 53.499±30.532 67.515±18.835 53.697±28.06 
 2 Bacillota            26.442±20.944 25.488±23.748 22.918±16.066 29.919±23.604
 3 Bacteroidota         4.876±7.757   5.588±8.456   5.332±8.486   4.158±7.066  
 4 Fusobacteriota       1.831±4.276   5.054±6.787   0.382±0.927   1.618±4.021  
 5 Desulfobacterota     2.107±4.594   4.449±5.799   0.327±0.938   2.58±5.419   
 6 Patescibacteria      0.48±2.558    2.198±5.658   0.052±0.151   0.069±0.295  
 7 Rs-K70 termite group 0.946±2.852   1.378±3.222   0.372±1.304   1.246±3.603  
 8 Synergistota         0.364±0.935   0.671±1.257   0.167±0.704   0.394±0.948  
 9 Planctomycetota      0.177±0.475   0.602±0.815   0.016±0.059   0.123±0.371  
10 Campylobacterota     1.088±6.268   0.528±0.911   0.265±0.675   2.053±9.542  
# ℹ 20 more rows

7.3.1.2 Family level

Percentange of families in each group

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family, Species) %>%
  summarise(relabun=sum(count))
family_summary %>%
    group_by(family) %>%
    summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
              Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
              Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
              Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
              Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
              Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
    mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
           Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
           Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>% 
  arrange(-Pk_mean) %>% 
    dplyr::select(family,Eptesicus,Hypsugo,Pipistrellus) %>% 
  left_join(., genome_metadata[c(3,6)] %>% unique(), by=join_by(family==family))
# A tibble: 285 × 5
   family              Eptesicus     Hypsugo      Pipistrellus  phylum        
   <chr>               <chr>         <chr>        <chr>         <chr>         
 1 Enterobacteriaceae  26.677±31.648 8.829±13.578 13.234±22.962 Pseudomonadota
 2 Morganellaceae      0.286±0.614   4.136±6.821  9.526±19.046  Pseudomonadota
 3 Yersiniaceae        0.223±0.478   9.419±14.295 6.631±18.115  Pseudomonadota
 4 Enterococcaceae     6.975±10.452  2.884±4.689  5.421±10.609  Bacillota     
 5 Streptococcaceae    0.518±0.904   2.154±3.462  4.843±15.641  Bacillota     
 6 Diplorickettsiaceae 14.235±27.678 0.202±0.498  4.667±20.883  Pseudomonadota
 7 Bacillaceae         0.463±0.701   7.36±9.57    4.639±14.907  Bacillota     
 8 Lachnospiraceae     5.118±11.827  2.331±5.594  4.137±8.837   Bacillota     
 9 Rickettsiaceae      0.048±0.147   6.597±15.049 4.097±13.242  Pseudomonadota
10 Pasteurellaceae     0.443±1.394   4.538±9.865  3.519±6.118   Pseudomonadota
# ℹ 275 more rows
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per environment
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~Species)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

7.3.1.3 Genus level

Percetange of genera in each group

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% 
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  group_by(sample,phylum,genus, Species) %>%
  summarise(relabun=sum(count)) 

genus_summary %>%
    group_by(genus) %>%
    summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
              Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
              Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
              Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
              Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
              Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
    mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
           Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
           Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>% 
  arrange(-Pk_mean) %>% 
    dplyr::select(genus,Eptesicus,Hypsugo,Pipistrellus) %>% 
  left_join(., genome_metadata[c(3,7)] %>% unique(), by=join_by(genus==genus))
# A tibble: 565 × 5
   genus             Eptesicus     Hypsugo      Pipistrellus phylum        
   <chr>             <chr>         <chr>        <chr>        <chr>         
 1 Serratia          0.223±0.478   9.05±14.401  6.493±18.155 Pseudomonadota
 2 Morganella        0.015±0.033   2.013±6.097  5.672±14.831 Pseudomonadota
 3 Enterococcus      6.975±10.452  2.884±4.688  5.418±10.61  Bacillota     
 4 Lactococcus       0.514±0.897   2.037±3.503  4.83±15.636  Bacillota     
 5 Bacillus          0.437±0.69    7.274±9.497  4.625±14.907 Bacillota     
 6 Rickettsiella     14.234±27.674 0.11±0.439   4.463±20.908 Pseudomonadota
 7 Rickettsia        0.048±0.147   6.597±15.049 4.097±13.242 Pseudomonadota
 8 Vibrio            3.254±10.076  5.492±8.443  3.046±11.078 Pseudomonadota
 9 Enterobacter      15.407±26.424 4.97±10.782  2.813±6.493  Pseudomonadota
10 Lachnoclostridium 3.64±9.837    0.525±2.014  2.806±7.312  Bacillota     
# ℹ 555 more rows
genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary %>%
  mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
  scale_color_manual(values=phylum_colors) +
  geom_jitter(alpha=0.5) + 
  facet_grid(.~Species)+ 
  theme_minimal() + 
  theme(axis.text.y = element_text(size=6))+
  labs(y="Genera", x="Relative abundance", color="Phylum")

Number of mags and distinct taxonomy

bats=c("Eb", "Pk", "Ha")

total_mags <- data.frame(
  Bat = character(),
  MAGs = numeric(), 
  Phylum = numeric(),
  Family = numeric(),
  Genus = numeric()
)

preabs_table <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>% 
  left_join(genome_metadata, by=join_by("genome"=="genome"))

phylum <- preabs_table %>% 
  distinct(phylum)

family <- preabs_table %>% 
  distinct(phylum, class, order, family)

genus <- preabs_table %>% 
  distinct(phylum, class, order, family, genus)

 total_mags <- rbind(
    total_mags,
    data.frame(
      Bat = "Total",
      MAGs = nrow(preabs_table),
      Phylum = nrow(phylum),
      Family = nrow(family),
      Genus = nrow(genus)
      )
  )

for (bat in bats) { 
  number <- preabs_table %>% 
    select({{bat}}) %>% 
    filter(.>=1)
  
  phylum <- preabs_table %>% 
     select({{bat}}, phylum) %>%
     filter(!!sym(bat)>=1) %>% 
     distinct(phylum)
  
  family <- preabs_table %>% 
     select({{bat}}, phylum, class, order, family) %>%
     filter(!!sym(bat)>=1) %>% 
     distinct(phylum, class, order, family)
  
  genus <- preabs_table %>% 
     select({{bat}}, phylum, class, order, family, genus) %>%
     filter(!!sym(bat)>=1) %>% 
     distinct(phylum, class, order, family, genus)
   
  total_mags <- rbind(
    total_mags,
    data.frame(
      Bat = bat,
      MAGs = nrow(number),
      Phylum = nrow(phylum),
      Family = nrow(family),
      Genus = nrow(genus)
      )
  )
}
bats=c("Eb", "Pk", "Ha")

no_annotation <- data.frame(
  Bat = character(),
  No_genus = numeric(), 
  No_species = numeric()
)

preabs_table <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>% 
  left_join(genome_metadata, by=join_by("genome"=="genome"))

genus <- preabs_table  %>%
    filter(is.na(genus))

species <- preabs_table  %>%
    filter(is.na(species))

 no_annotation <- rbind(
    no_annotation,
    data.frame(
      Bat = "Total",
      No_genus = nrow(genus),
      No_species = nrow(species)
      )
  )

for (bat in bats) { 
  number <- preabs_table %>% 
    select({{bat}}) %>% 
    filter(.>=1)
  
  genus <- preabs_table %>% 
     select({{bat}}, phylum, class, order, family, genus) %>%
     filter(!!sym(bat)>=1) %>%
    filter(is.na(genus))
  
  species <- preabs_table %>% 
     filter(!!sym(bat)>=1) %>%
    filter(is.na(species))
   
  no_annotation <- rbind(
    no_annotation,
    data.frame(
      Bat = bat,
      No_genus = nrow(genus),
      No_species = nrow(species)
      )
  )
}

Total percentage of MAGs without genus-level annotation

nongenera <- genome_metadata %>%
  filter(is.na(genus)) %>%
  summarize(Mag_nogenera = n()) %>% 
  pull()
nmags <- total_mags %>% 
  filter(Bat=="Total") %>% 
  select(MAGs) %>% 
  pull()
perct <- nongenera*100/nmags
cat(perct)
32.13354

Percentage of MAGs without genus-level annotation by phylum

total_mag_phylum <- genome_metadata %>%
  group_by(phylum) %>%
  summarize(Total_MAGs = n())
genome_metadata %>%
  filter(is.na(genus)) %>%
  group_by(phylum) %>%
  summarize(MAGs_nogenus = n()) %>% 
  left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>% 
  mutate(Percentage_nogenus=100*MAGs_nogenus/Total_MAGs) %>% 
  tt()
phylum MAGs_nogenus Total_MAGs Percentage_nogenus
Actinomycetota 72 251 28.685259
Apal-E12 1 1 100.000000
Armatimonadota 1 1 100.000000
Bacillota 504 1334 37.781109
Bacteroidota 92 490 18.775510
Bdellovibrionota 4 9 44.444444
Chloroflexi 8 9 88.888889
Crenarchaeota 1 7 14.285714
Cyanobacteriota 18 53 33.962264
Dependentiae 1 1 100.000000
Desulfobacterota 41 185 22.162162
Halobacterota 1 35 2.857143
Myxococcota 2 5 40.000000
Patescibacteria 38 60 63.333333
Planctomycetota 60 70 85.714286
Pseudomonadota 349 1176 29.676871
Rs-K70 termite group 16 16 100.000000
Spirochaetota 2 7 28.571429
Synergistota 13 28 46.428571
Thermoplasmatota 2 3 66.666667
Verrucomicrobiota 6 35 17.142857

Number of bacterial species

genome_metadata %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(species) %>%
  unique() %>%
  pull() %>%
  length() %>% 
  cat()
162

Total percentage of MAGs without species-level annotation

nonspecies <- genome_metadata %>%
  filter(species == "") %>%
  summarize(Mag_nospecies = n()) %>% 
  pull()
perct <- nonspecies*100/nmags
cat(perct)
0

MAGs without species-level annotation

total_mag_phylum <- genome_metadata %>%
  group_by(phylum) %>%
  summarize(MAGs_total = n())
genome_metadata %>%
  filter(is.na(species)) %>%
  group_by(phylum) %>%
  summarize(MAGs_nospecies = n()) %>% 
  left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
  mutate(species_annotated=MAGs_total-MAGs_nospecies) %>% 
  mutate(Percentage_nospecies=100*MAGs_nospecies/MAGs_total) %>% 
  mutate(Percentage_species=100-100*MAGs_nospecies/MAGs_total)%>% 
  tt()
phylum MAGs_nospecies MAGs_total species_annotated Percentage_nospecies Percentage_species
Actinomycetota 228 251 23 90.83665 9.163347
Apal-E12 1 1 0 100.00000 0.000000
Armatimonadota 1 1 0 100.00000 0.000000
Bacillota 1288 1334 46 96.55172 3.448276
Bacteroidota 468 490 22 95.51020 4.489796
Bdellovibrionota 9 9 0 100.00000 0.000000
Campylobacterota 10 13 3 76.92308 23.076923
Chloroflexi 9 9 0 100.00000 0.000000
Crenarchaeota 7 7 0 100.00000 0.000000
Cyanobacteriota 51 53 2 96.22642 3.773585
Deferribacterota 9 9 0 100.00000 0.000000
Deinococcota 5 5 0 100.00000 0.000000
Dependentiae 1 1 0 100.00000 0.000000
Desulfobacterota 185 185 0 100.00000 0.000000
Elusimicrobiota 5 5 0 100.00000 0.000000
Euryarchaeota 3 3 0 100.00000 0.000000
Fusobacteriota 15 19 4 78.94737 21.052632
Halobacterota 35 35 0 100.00000 0.000000
Myxococcota 5 5 0 100.00000 0.000000
Nanohaloarchaeota 2 2 0 100.00000 0.000000
Patescibacteria 60 60 0 100.00000 0.000000
Planctomycetota 70 70 0 100.00000 0.000000
Pseudomonadota 1082 1176 94 92.00680 7.993197
Rs-K70 termite group 16 16 0 100.00000 0.000000
Spirochaetota 7 7 0 100.00000 0.000000
Sumerlaeota 1 1 0 100.00000 0.000000
Synergistota 28 28 0 100.00000 0.000000
Thermoplasmatota 3 3 0 100.00000 0.000000
Verrucomicrobiota 35 35 0 100.00000 0.000000

ASVs in single species

bats=c("Eb", "Pk", "Ha")

# Initialize an empty results data frame
single_sp <- data.frame(
  Bat = character(),
  Single_species = numeric()
)

table_upset_analysis <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>% 
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample")

unique_all <- table_upset_analysis %>% 
      filter(rowSums(across(Eb:Pk)) == 1)

    single_sp <- rbind(
    single_sp,
    data.frame(
      Bat = "Total",  # Label for the total value
      Single_species = nrow(unique_all) # Aggregate sum of column sums
    )
  )
  
  for (bat in bats) {  
    unique <- table_upset_analysis %>%
      filter(rowSums(across(Eb:Pk)) == 1) %>% 
      select(bat) %>% 
      filter(.>0) %>% 
      nrow()
    # Add results to the results data frame
    single_sp <- rbind(
      single_sp,
      data.frame(
        Bat = bat,
        Single_species = unique # Aggregate sum of column sums
      )
    )
  }

ASVs in a single individual

single_ind <- data.frame(
  Bat = character(),
  Single_individual = numeric()
)

 singleton_filt <- freq_table %>%
  rowwise() %>%
  mutate(row_sum = sum(c_across(-asv))) %>% # Calculate row sum for specific columns
  filter(row_sum == 1) %>% 
  column_to_rownames(var = "asv")  %>% 
  filter(row_sum==1)

     single_ind <- rbind(
      single_ind,
      data.frame(
        Bat = "Total",
        Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
      )
    )
     
for (bat in bats) {
  singleton_filt <- freq_table %>%
    rowwise() %>%
    mutate(row_sum = sum(c_across(-asv))) %>% 
    filter(row_sum == 1) %>% 
    column_to_rownames(var = "asv")  %>% 
    select(bat) %>% 
    filter(.==1)

      single_ind <- rbind(
      single_ind,
      data.frame(
        Bat = bat,
        Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
      )
    )
  }

7.4 Summary table

summary_table <- total_mags %>% 
  left_join(., no_annotation, by="Bat") %>% 
  left_join(., single_ind, by="Bat") %>% 
  left_join(., single_sp, by="Bat")
summary_table
    Bat MAGs Phylum Family Genus No_genus No_species Single_individual Single_species
1 Total 3834     30    328   732     1232       3639                34           3387
2    Eb 1381     19    158   294      481       1326                14           1093
3    Pk 1523     25    217   408      511       1443                11           1140
4    Ha 1500     24    239   496      361       1387                 9           1154
summary_table %>% 
  select(-Phylum,-Family, -Genus) %>% 
  rowwise() %>% 
  mutate(No_genus_perc=No_genus*100/MAGs)%>% 
  mutate(No_species_perc=No_species*100/MAGs) %>% 
  mutate(Single_individual_perc=Single_individual*100/MAGs)%>% 
  mutate(Single_species_perc=Single_species*100/MAGs) %>% 
  mutate(Single_individual_per_unique=Single_individual*100/Single_species) %>% 
  select(1,6:11)
# A tibble: 4 × 7
# Rowwise: 
  Bat   Single_species No_genus_perc No_species_perc Single_individual_perc Single_species_perc Single_individual_per_unique
  <chr>          <int>         <dbl>           <dbl>                  <dbl>               <dbl>                        <dbl>
1 Total           3387          32.1            94.9                  0.887                88.3                        1.00 
2 Eb              1093          34.8            96.0                  1.01                 79.1                        1.28 
3 Pk              1140          33.6            94.7                  0.722                74.9                        0.965
4 Ha              1154          24.1            92.5                  0.6                  76.9                        0.780